from io import open

import scrapy

'''
提取html内容，并写入本地json文件
scrapy crawl quotes_extracting -o quotes.json 运行两次该脚本将会破会json结构
scrapy crawl quotes_extracting -o quotes_jl.jl
'''
class QuotesSpider(scrapy.Spider):
    name = 'quotes_extracting'
    start_urls = [
        'http://quotes.toscrape.com/page/1/',
        'http://quotes.toscrape.com/page/2/',
    ]

    def parse(self, response):
        for quote in response.css('div.quote'):
            yield {
                'text': quote.css('span.text::text').get(),
                'author': quote.css('small.author::text').get(),
                'tags': quote.css('div.tags a.tag::text').getall()
            }
